# -*- coding: UTF-8 -*-
# 常用工具类
import re
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.file_reader import read_text


# 补全http链接
def fill_link(url, base='', protocol='http'):
    try:
        if not url:
            return url

        if url and url.startswith('//'):
            return protocol + ':' + url.strip()
        elif url and url.startswith('/') and base:
            return base + url.strip()
        elif not url.startswith('/') and not url.startswith('http'):
            return protocol + '://' + url.strip()
        return url
    except ValueError:
        return url


# 匹配链接主体部分，除去参数和锚点
def get_main_url(url):
    try:
        if url:
            if '?' in url:
                # http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg?1479872247#w
                # http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg
                # 懒惰模式
                arr = re.findall(r'(^.*?)\?.*', url.strip())
                if len(arr):
                    return arr[0].strip()
            elif '#' in url:
                # http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg#wxxx
                # http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg
                # 懒惰模式处理锚点
                arr = re.findall(r'(^.*?)\#.*', url.strip())
                if len(arr):
                    return arr[0].strip()
        return url
    except ValueError:
        return url


# cookie转json
def parse_cookie(c):
    cookies = {}
    # myShopf2e8eddb3e4c37df007eef4309ac4c5a=userName=6csdtcxbz2lf; bdshare_firstime=1539480617976; Userf2e8eddb3e4c37df007eef4309ac4c5a=userid=119901&UserName=13586846698&groupid=2&schoolid=0&password=7a478c58dd477afdc6b1a3b71018c519&RndPassword=81UHS1XG5AQXRNGCZZTF; ASP.NET_SessionId=1w1suo4ebsbankgyuni3vluq; NavPostion119901=Basic/MyFeePackages; agentuid=119901
    if c:
        list = c.strip().split('; ')
        for item in list:
            arr = item.strip().split('=', 1)
            cookies[arr[0]] = arr[1]
    return cookies


# 默认header解析
def parse_header(c):
    headers = {}
    if not c:
        c = read_text('./headers.txt')
    if c:
        list = c.strip().split('\n')
        for item in list:
            arr = item.strip().split(': ', 1)
            headers[arr[0]] = arr[1]
    return headers


# 解析IP:PORT
def match_proxy(text):
    if text:
        arr = re.findall(r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5]):\d{0,5}', text.strip())
        if len(arr):
            return arr[0].strip()

    return None


# 组装重试请求
def retry_request(request, priority_adjust=50):

    meta = request.meta
    if meta:

        settings = get_project_settings()
        retry = meta.get('validate_retry_times', 1)
        if retry > settings.getint('RETRY_TIMES', 2):
            raise IgnoreRequest('重试超限放弃：%s' % request)
        retryreq = request.copy()
        retry = 0
        if retryreq and retryreq.meta and 'validate_retry_times' in retryreq.meta:
            retry = retryreq.meta.get('validate_retry_times', 0)
        retryreq.meta['validate_retry_times'] = retry + 1
        retryreq.dont_filter = True
        retryreq.priority = request.priority - priority_adjust

        return retryreq


# # 测试
if __name__ == '__main__':
    # print(read_rows('../url_map.txt'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669?1479872247#w'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669?147987?2247#w#c'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669?147987'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669#147987'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg?1479872247#w'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg?1479872247#w?xx'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg?1479872247'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg?1479872247?xxa'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg#aa'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg#aa#xx'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669.jpg'))
    # print(get_main_url('http://img.zydmall.com/public/images/95/fd/6e/87d3acc3a3b3b35094139e6f23576aca0c1a0669'))

    # print(parse_header(read_text('./headers.txt')))
    print(parse_cookie('JSESSIONID=feed37ee12b54f2501578bb1d87a; centralSessionId=_userId-d9486829ed4e426da1f5743914df635a_f6485ef11eee4aadb904763efdf27b36; gr-web-8080=44799.6483.194.8051.0000; gr-web-oscp=44799.6486.194.8155.0000; gr-web3-8080=44799.6746.194.8054.0000; JSESSIONID=ff07204081074d95627c269f9bbf; WT_FPC=id=25cfdfef2876337c45a1585915656842:lv=1585915878182:ss=1585915656842'))

    # print(match_proxy('39.69.69.118:4284'))
    # print(match_proxy('err'))
